# Author: Ramona Roller, rroller@ethz.ch
# Date: October 2020
# Paper: Theory-Driven Statistics for the Digital Humanities:
#         Presenting Pitfalls and a Practical Guide by the Example of the Reformation
# Journal: Special Issue on 'Theorytellings' for Journal of Cultural Analytics
# Summary: Explain why a territory in 16th-century Holy Roman Empire became Protestant
#           or remained Catholic using a logistic regression.



####################################### 
### Load packages
#######################################
library(tidyverse)
library(texreg) # plotting pretty regression tables

####################################### 
### Load data
#######################################
path = "/home/.../"
terrs <- read.csv(paste(path, "theorytellings_data.csv", sep=","))


####################################### 
### Run model
#######################################
# Logistic regression
# unit of analysis: a territory
# DV: whether a territory became Protestant or remained Catholic
# IV 1: time-weighted number of letters reformers sent to territory.
# IV 2: time-weighted number of days reformers spent in territory
fit <- glm(switched ~ 
            letters +
            visits,
            data=terrs, family="binomial")


summary(fit)

# First check whether overall model is better than intercept model
# Since global F test only exist for lm we use chi-square test
# Numbers are from summary(fit)
# pchisq(null deviance (residuals of null model) - residual deviance (residuals of tested model),
#    null degrees of freedom (N observations - N variables (=1: intercept)) 
#    - residual dfs (N observations - N variables (3: 2 IVs + 1 intercept)))
# This outputs the p-value: should be small than 0.05 to reject H0
# H0: intercept and the current model are the same
1-pchisq(304.14-294.86, 261-259)



texreg(list(fit, fit),
       digits = 4, 
       stars = c(0.01, 0.05, 0.1), 
       single.row = TRUE,
       custom.model.names = c("No correction", "Bonferroni"),
       custom.coef.names = c('Intercept',
                             'Letters', 
                             'Visits'),
       caption = 'Logistic regression results to predict the first switch to 
       Protestantism of territories.
       Letters represent the mean time-weighted number of letters sent by reformers 
       to a territory before it became protestant or ceased to exist.
       Visits represent the mean time-weighted number of days reformers spent 
       in a territory before it became protestant or ceased to exist.
       Left: no correction for multiple hypothesis testing.
       The significance level is set to $0.1$.
       Right: Bonferroni correction. 
       The significance level is reduced to $0.05$, i.e., the uncorrected 
       significance level of $0.1$ is divided by $2$, the number of tested hypotheses. 
       With the Bonferroni correction, Letters is no longer significant.',
       caption.above = TRUE,
       label = "tab:logist regression",
       file = 'logit.tex')




